Load libraries
# Load libraries (install first if needed)
library(tidyverse); theme_set(theme_light(base_size = 12))
#> ââ Attaching packages âââââââââââââââââââââââââââââââââââââââ tidyverse 1.3.1 ââ
#> â ggplot2 3.3.5 â purrr 0.3.4
#> â tibble 3.1.5 â dplyr 1.0.7
#> â tidyr 1.1.4 â stringr 1.4.0
#> â readr 2.1.1 â forcats 0.5.1
#> ââ Conflicts ââââââââââââââââââââââââââââââââââââââââââ tidyverse_conflicts() ââ
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
library(tidylog)
#>
#> Attaching package: 'tidylog'
#> The following objects are masked from 'package:dplyr':
#>
#> add_count, add_tally, anti_join, count, distinct, distinct_all,
#> distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
#> full_join, group_by, group_by_all, group_by_at, group_by_if,
#> inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
#> relocate, rename, rename_all, rename_at, rename_if, rename_with,
#> right_join, sample_frac, sample_n, select, select_all, select_at,
#> select_if, semi_join, slice, slice_head, slice_max, slice_min,
#> slice_sample, slice_tail, summarise, summarise_all, summarise_at,
#> summarise_if, summarize, summarize_all, summarize_at, summarize_if,
#> tally, top_frac, top_n, transmute, transmute_all, transmute_at,
#> transmute_if, ungroup
#> The following objects are masked from 'package:tidyr':
#>
#> drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
#> spread, uncount
#> The following object is masked from 'package:stats':
#>
#> filter
library(rnaturalearth)
library(rnaturalearthdata)
library(ggsidekick) # devtools::install_github("seananderson/ggsidekick")
library(RColorBrewer)
library(forcats)
library(viridis)
#> Loading required package: viridisLite
#>
#> Attaching package: 'viridis'
#> The following object is masked from 'package:viridisLite':
#>
#> viridis.map
library(rnaturalearth)
library(rnaturalearthdata)
library(broom)
library(rgdal)
#> Loading required package: sp
#> rgdal: version: 1.5-18, (SVN revision 1082)
#> Geospatial Data Abstraction Library extensions to R successfully loaded
#> Loaded GDAL runtime: GDAL 3.1.4, released 2020/10/20
#> Path to GDAL shared files: /Users/maxlindmark/Library/R/4.0/library/rgdal/gdal
#> GDAL binary built with GEOS: TRUE
#> Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
#> Path to PROJ shared files: /Users/maxlindmark/Library/R/4.0/library/rgdal/proj
#> Linking to sp version:1.4-4
#> To mute warnings of possible GDAL/OSR exportToProj4() degradation,
#> use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(ggmap)
#> Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
#> Please cite ggmap if you use it! See citation("ggmap") for details.
library(sf)
#> Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
library(png)
library(patchwork)
library(ggridges)
Read data
d <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/perch_growth_gradient/master/data/for_analysis/dat.csv") %>% dplyr::select(-...1)
#> New names:
#> * `` -> ...1
#> Rows: 362198 Columns: 12
#> ââ Column specification ââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
#> Delimiter: ","
#> chr (5): age_ring, area, gear, ID, sex
#> dbl (7): ...1, length_mm, age_bc, age_catch, catch_year, cohort, final_length
#>
#> âč Use `spec()` to retrieve the full column specification for this data.
#> âč Specify the column types or set `show_col_types = FALSE` to quiet this message.
d
glimpse(d)
#> Rows: 362,198
#> Columns: 11
#> $ length_mm <dbl> 62, 115, 153, 168, 184, 51, 110, 143, 155, 163, 71, 112, âŠ
#> $ age_bc <dbl> 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 5, âŠ
#> $ age_catch <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, 5, âŠ
#> $ age_ring <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "YâŠ
#> $ area <chr> "BT", "BT", "BT", "BT", "BT", "BT", "BT", "BT", "BT", "BTâŠ
#> $ catch_year <dbl> 1977, 1977, 1977, 1977, 1977, 1977, 1977, 1977, 1977, 197âŠ
#> $ cohort <dbl> 1972, 1972, 1972, 1972, 1972, 1972, 1972, 1972, 1972, 197âŠ
#> $ final_length <dbl> 184, 184, 184, 184, 184, 163, 163, 163, 163, 163, 178, 17âŠ
#> $ gear <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NâŠ
#> $ ID <chr> "1977_1_BT", "1977_1_BT", "1977_1_BT", "1977_1_BT", "1977âŠ
#> $ sex <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "0", "0âŠ
# Use only length-at-age by filtering on age_ring
d <- d %>% filter(age_ring == "Y")
#> filter: removed 28,413 rows (8%), 333,785 rows remaining
Plot data
All data
ggplot(d, aes(age_bc, length_mm, color = area)) +
geom_jitter(height = 0, alpha = 0.2)

ggplot(d, aes(age_bc, length_mm, color = area)) +
geom_jitter(height = 0, alpha = 0.2) +
guides(color = "none") +
facet_wrap(~ area)

Sample locations & length of time series
# Map plot
theme_set(theme_light(base_size = 12))
# Read UTM function
LongLatToUTM <- function(x, y, zone){
xy <- data.frame(ID = 1:length(x), X = x, Y = y)
coordinates(xy) <- c("X", "Y")
proj4string(xy) <- CRS("+proj=longlat +datum=WGS84") ## for example
res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
return(as.data.frame(res))
}
# Specify ranges for big map
ymin = 52; ymax = 67; xmin = 11; xmax = 25
map_data <- rnaturalearth::ne_countries(
scale = "medium",
returnclass = "sf", continent = "europe")
# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
st_crop(map_data,
c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))
# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)
# Add point to areas
sort(unique(d$area))
#> [1] "BS" "BT" "FB" "FM" "HO" "JM" "MU" "RA" "SI_EK"
#> [10] "SI_HA" "TH" "VN"
df <- data.frame(Area = c("Brunskar (BS)", "Biotest (BT)", "Finbo (FB)", "Forsmark (FM)",
"Holmon (HO)", "Kvadofjarden (JM)", "Musko (MU)", "Ranea (RA)",
"Simpevarp 1 (SI_EK", "Simpevarp 2 (SI_HA", "Torhamn (TH)", "Vino (VN)"),
lon = c(21.5, 18.1, 19.5, 18, 20.9, 16.8, 18.1, 22.3, 16.6, 16.7, 15.9, 16.9),
lat = c(60, 60.4, 60.3, 60.5, 63.7, 58, 59, 65.9, 57.3, 57.4, 56.1, 57.5))
# Add UTM coords
utm_coords <- LongLatToUTM(df$lon, df$lat, zone = 33)
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
df$X <- utm_coords$X
df$Y <- utm_coords$Y
# Crop the plot
xmin2 <- 330000; xmax2 <- 959000; xrange <- xmax - xmin
ymin2 <- 6000000; ymax2 <- 7300000; yrange <- ymax - ymin
ggplot(swe_coast_proj) +
geom_sf() +
coord_sf(xlim = c(xmin2, xmax2), ylim = c(ymin2, ymax2)) +
geom_point(data = df, aes(x = X, y = Y, color = Area), size = 3) +
labs(x = "Longitude", y = "Latitude") +
scale_color_brewer(palette = "Paired") +
NULL

Sample sizes
# Average sample size by ID
d %>%
group_by(cohort, area, ID) %>%
summarise(n = n()) %>%
ungroup() %>%
ggplot(., aes(x = n, y = area, n, fill = area)) +
geom_density_ridges(stat = "binline", bins = 20, scale = 1, draw_baseline = FALSE, alpha = 0.8) +
scale_fill_brewer(palette = "Paired") +
guides(fill = "none") +
NULL
#> group_by: 3 grouping variables (cohort, area, ID)
#> summarise: now 91,595 rows and 4 columns, 2 group variables remaining (cohort, area)
#> ungroup: no grouping variables

# Sample size by gear (some overlappning gears with different names)
d %>%
group_by(gear, area) %>%
summarise(n = n()) %>%
ggplot(., aes(factor(gear), n, fill = area)) +
geom_bar(stat = "identity") +
guides(fill = "none") +
facet_wrap(~area, scales = "free") +
scale_fill_brewer(palette = "Paired") +
theme(axis.text.x = element_text(angle = 90)) +
NULL
#> group_by: 2 grouping variables (gear, area)
#> summarise: now 37 rows and 3 columns, one group variable remaining (gear)

# Plot sample size by area and cohort (all length-at-ages)
d %>%
group_by(cohort, area) %>%
summarise(n = n()) %>%
ggplot(., aes(cohort, n, fill = area)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Paired") +
NULL
#> group_by: 2 grouping variables (cohort, area)
#> summarise: now 505 rows and 3 columns, one group variable remaining (cohort)

d %>%
group_by(cohort, area) %>%
summarise(n = n()) %>%
ggplot(., aes(cohort, n, color = area)) +
geom_line() +
scale_color_brewer(palette = "Paired") +
facet_wrap(~area) +
guides(color = "none") +
theme(axis.text.x = element_text(angle = 90)) +
NULL
#> group_by: 2 grouping variables (cohort, area)
#> summarise: now 505 rows and 3 columns, one group variable remaining (cohort)

# Plot sample size by area and catch_year
d %>%
group_by(catch_year, area) %>%
summarise(n = n()) %>%
ggplot(., aes(catch_year, n, fill = area)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Paired") +
NULL
#> group_by: 2 grouping variables (catch_year, area)
#> summarise: now 371 rows and 3 columns, one group variable remaining (catch_year)

d %>%
group_by(catch_year, area) %>%
summarise(n = n()) %>%
ggplot(., aes(catch_year, n, color = area)) +
geom_line() +
scale_color_brewer(palette = "Paired") +
facet_wrap(~area) +
guides(color = "none") +
theme(axis.text.x = element_text(angle = 90)) +
NULL
#> group_by: 2 grouping variables (catch_year, area)
#> summarise: now 371 rows and 3 columns, one group variable remaining (catch_year)

Trends in length-at-age
# All ages
d %>%
group_by(age_bc, area) %>%
ggplot(., aes(catch_year, length_mm, color = area)) +
geom_point(size = 0.1, alpha = 0.5) +
facet_wrap(~age_bc) +
scale_color_brewer(palette = "Paired", name = "Area") +
theme(axis.text.x = element_text(angle = 90),
legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(size = 2))) +
NULL
#> group_by: 2 grouping variables (age_bc, area)

# Filter common ages
d %>%
group_by(age_bc, area) %>%
filter(age_bc < 13) %>%
ggplot(., aes(catch_year, length_mm, color = factor(age_bc))) +
geom_point(size = 0.1, alpha = 0.5) +
facet_wrap(~area, ncol = 6) +
scale_color_brewer(palette = "Paired", name = "Age") +
theme(axis.text.x = element_text(angle = 90),
legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(size = 2))) +
NULL
#> group_by: 2 grouping variables (age_bc, area)
#> filter (grouped): removed 246 rows (<1%), 333,539 rows remaining

# Age-area grid
d %>%
group_by(age_bc, area) %>%
filter(age_bc < 7) %>%
ggplot(., aes(catch_year, length_mm, color = factor(age_bc))) +
geom_point(size = 0.1, alpha = 0.5) +
stat_smooth(aes(catch_year, length_mm, group = factor(age_bc)),
se = F, formula = y ~ s(x, k = 3), color = "grey30") +
facet_grid(age_bc~area) +
scale_color_brewer(palette = "Paired", name = "Age") +
theme(axis.text.x = element_text(angle = 90),
legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(size = 2))) +
NULL
#> group_by: 2 grouping variables (age_bc, area)
#> filter (grouped): removed 14,410 rows (4%), 319,375 rows remaining
#> `geom_smooth()` using method = 'gam'

Time-slopes of length-at-age
# Calculate time-slopes by age and area
time_slopes_by_year_area <- d %>%
group_by(age_bc, area) %>% # center length at age for comparison across ages
mutate(length_mm_ct = length_mm / mean(length_mm)) %>%
ungroup() %>%
mutate(id = paste(age_bc, area, sep = ";")) %>%
split(.$id) %>%
purrr::map(~lm(length_mm_ct ~ catch_year, data = .x)) %>%
purrr::map_df(broom::tidy, .id = 'id') %>%
filter(term == 'catch_year') %>%
separate(id, into = c("age_bc", "area"), sep = ";") %>%
mutate(upr = estimate + std.error*2, lwr = estimate - std.error*2) %>%
mutate(id = paste(age_bc, area, sep = ";"))
#> group_by: 2 grouping variables (age_bc, area)
#> mutate (grouped): new variable 'length_mm_ct' (double) with 35,825 unique values and 0% NA
#> ungroup: no grouping variables
#> mutate: new variable 'id' (character) with 167 unique values and 0% NA
#> filter: removed 167 rows (50%), 167 rows remaining
#> mutate: new variable 'upr' (double) with 154 unique values and 9% NA
#> new variable 'lwr' (double) with 154 unique values and 9% NA
#> mutate: new variable 'id' (character) with 167 unique values and 0% NA
time_slopes_by_year_area
# Add sample size so that we can filter on that
sample_size <- d %>%
group_by(age_bc, area) %>%
summarise(n = n()) %>%
ungroup() %>%
mutate(id = paste(age_bc, area, sep = ";")) %>%
dplyr::select(n, id)
#> group_by: 2 grouping variables (age_bc, area)
#> summarise: now 167 rows and 3 columns, one group variable remaining (age_bc)
#> ungroup: no grouping variables
#> mutate: new variable 'id' (character) with 167 unique values and 0% NA
# Join sample size
time_slopes_by_year_area <- left_join(time_slopes_by_year_area, sample_size)
#> Joining, by = "id"
#> left_join: added one column (n)
#> > rows only in x 0
#> > rows only in y ( 0)
#> > matched rows 167
#> > =====
#> > rows total 167
# Plot effect sizes
time_slopes_by_year_area %>%
filter(n > 30) %>%
ggplot(., aes(reorder(age_bc, as.numeric(age_bc)), estimate, color = factor(area))) +
geom_point(position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2,
position = position_dodge(width = 0.4)) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_color_brewer(palette = "Paired") +
facet_wrap(~factor(area), scales = "free") +
labs(x = "Age", y = "slope: size~time") +
theme(legend.position = "bottom")
#> filter: removed 53 rows (32%), 114 rows remaining

NULL
#> NULL
time_slopes_by_year_area %>%
filter(n > 30) %>%
ggplot(., aes(reorder(age_bc, as.numeric(age_bc)), estimate, color = factor(area))) +
geom_point(position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2,
position = position_dodge(width = 0.4)) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_color_brewer(palette = "Paired") +
labs(x = "Age", y = "slope: size~time") +
NULL
#> filter: removed 53 rows (32%), 114 rows remaining

time_slopes_by_year_area %>%
filter(n > 30) %>%
ggplot(., aes(as.numeric(age_bc), estimate)) +
geom_point(position = position_dodge(width = 0.4)) +
geom_hline(yintercept = 0, linetype = "dashed") +
stat_smooth(method = "gam", formula = y ~ s(x, k = 3)) +
labs(x = "Age", y = "slope: size~time")
#> filter: removed 53 rows (32%), 114 rows remaining

NULL
#> NULL